///This data set cleans up the raw data from the student population experiment.
///It produces the cleaned up data that is used for a graph inFigure4.R.

set more off

cd "C:\Users\tabi\Dropbox\BonillaLlloydMo\Replication Files\Data Files"

use "Figure_4_raw.dta", clear

set more off

***********************************************
*** Demographics and "Political" Variables ****
***********************************************

*Age
tab age
*rescaled age2 goes from 0 to 1*
generate age2=(age-17)/13
tab age2

*Gender
tab gender, nolabel
encode gender, generate (gender2)
gen female=gender2
recode female (2=0)
tab female
label define female 0 "Male" 1 "Female"
label value female female

*Race
tab race
encode race, generate (ethnicity)
tab ethnicity
gen white= 0
replace white = 1 if race =="White"


*Religion
encode religion, generate (religion2)
tab religion2

gen religiosity = 0 if religion2 >= 8
replace religiosity = 1 if religion2 < 8

*Graduation Year
tab gradyear
encode gradyear, generate (graduation)
tab graduation 

*GPA
tab gpa
replace gpa=3.03 if gpa==303
drop gpa2
*rescaled gpa2 goes from 0 to 1*
generate gpa2=(gpa-2)/3
tab gpa2

*Household Income 1
tab hhincome
encode hhincome, generate (fifty_income)

*Household Income
encode hhincome2, generate (hhincome22)
encode hhincome3, generate (hhincome32)

tab hhincome22, nolabel
replace hhincome22=6-hhincome22

tab hhincome32
tab hhincome32, nolabel
recode hhincome32 (3=6) (4=7) (1=8) (2=9)

gen household_income=hhincome22

gen hhincome322=hhincome32
replace household_income= hhincome32 if household_income==.
tab household_income
label define household_income  1 "Under $10,000" 2 "$10,000 to $19,999" 3 "$20,000 to $29,999" 4 "$30,000 to $39,999" 5 "$40,000 to $49,999" 6 "$50,000 to $74,999" 7 "$75,000 to $99,999" 8 "$100,000 to $149,999" 9 "$150,000 or more"
label value household_income household_income
*householdincome is rescaled from 0 to 1*
generate householdincome=(household_income-1)/8

*Party ID
tab pID
label define pID 1 "Democrat" 2 "Republican" 3 "Independent" 4 "Other"
label value pID pID
replace pID=. if pID==0

tab rep1
tab dem1

*Dem-Rep scale only for Partisans
gen dem_rep=dem1
replace dem_rep=3 if rep1==2
replace dem_rep=4 if rep1==1
label define dem_rep 1 "Strong Democrat" 2 "Weak Democrat" 3 "Weak Republican" 4 "Strong Republican"
label value dem_rep dem_rep

*Dem-Rep scale for all
tab ind1

gen party_scale=dem1
replace party_scale=3 if ind1==2
replace party_scale=4 if ind1==3
replace party_scale=5 if ind1==1
replace party_scale =6 if rep1==2
replace party_scale =7 if rep1==1

label define party_scale 1 "Strong Democrat" 2 "Weak Democrat" 3 "Independent-Democrat" 4 "Independent-Neutral" 5 "Independent-Republican" 6 "Weak Republican" 7 "Strong Republican"
label value party_scale party_scale
*partyscale goes from 0 to 1*
generate partyscale=(party_scale-1)/6
tab partyscale

*Self-Placement left-right scale
encode ideo1, generate (ideology)
tab ideology

replace ideology=. if ideology==8
tab ideology
*ideology2 goes from 0 to 1*
generate ideology2=(ideology-1)/6
tab ideology2

tab partyscale ideology2
corr partyscale ideology2

*News Consumption
tab polnews
encode polnews, generate (news)
tab news
recode news (4=1) (1=2) (2=3) (3=4)
generate news2=news
label define news2 1 "Hardly ever" 2 "A few times a month" 3 "A few times a week" 4 "Every day"
label value news2 news2
*rescaled polnew2 goes from 0 to 1*
generate polnews2=(news2-1)/3
tab polnews2

*******Political Knowledge Questions*******
*presidential term limit
tab pk1
recode pk1 (2=1) (.=.) (else=0)

*senator length of term
tab pk2
recode pk2 (6=1) (.=.) (else=0)

*John Boehner's position
tab job_boehner
encode job_boehner, generate (pk3)
recode pk3 (4=1) (.=.) (else=0)
tab pk3

*David Cameron's position
tab job_dc
encode job_dc, generate (pk4)
recode pk4 (2=1) (.=.) (else=0)
tab pk4

*Elena Kagan's position
tab job_ek
encode job_ek, generate (pk5)
recode pk5 (2=1) (.=.) (else=0)
tab pk5

*Janet Napolitano's position
tab job_jn
encode job_jn, generate (pk6)
recode pk6 (2=1) (.=.) (else=0)

*Number of fatalities
tab fatalities
encode fatalities, generate (pk7)
tab pk7, nolabel
recode pk7 (1=1) (.=.) (else=0)

*Unemployment Rate
tab unemprate
encode unemprate, gen (pk8)
tab pk8, nolabel
recode pk8 (4=1) (.=.) (else=0)

*Total knowledge (questions weight equally)
gen knowledge=pk1+pk2+pk3+pk4+pk5+pk6+pk7+pk8
tab knowledge

alpha pk1 pk2 pk3 pk4 pk5 pk6 pk7 pk8
factor pk1 pk2 pk3 pk4 pk5 pk6 pk7 pk8, factors(1)
predict knowledge2

tab knowledge2
corr knowledge2 knowledge
*rescaled polknowledge goes from 0 to 1"
generate polknowledge=(knowledge2+2.820037)/3.4373588
tab polknowledge

****************************
****************************
****************************************************************************
****************************************************************************
*** Figure 4 * Categorizing Victims by Industry*
****************************************************************************
*Experiment1 Treatment Variable
tab chm_exp1a_describe
tab chm_exp1b_describe
tab chm_exp1c_describe
encode chm_exp1a_describe, generate (chm_exp1a_describe2)
encode chm_exp1b_describe, generate (chm_exp1b_describe2)
encode chm_exp1c_describe, generate (chm_exp1c_describe2)

*Treatment 1 -> sex industry
*Treatment 2 -> exploitative environments
*Treatment 1 -> menial labor

gen exp1_treatment=.
replace exp1_treatment=1 if chm_exp1a_describe2!=.
replace exp1_treatment=2 if chm_exp1b_describe2!=.
replace exp1_treatment=3 if chm_exp1c_describe2!=.
tab exp1_treatment

**DEPENDENT VARIABLE 1 (How would you describe these individuals?)
gen chm_exp1_describe= chm_exp1a_describe+ chm_exp1b_describe+ chm_exp1c_describe
encode chm_exp1_describe, generate (chm_exp1_describe2)
tab chm_exp1_describe2, nolabel

gen exp1_dv= chm_exp1_describe2
label define exp1_dv 1 "Illegal immigrants" 2 "Other" 3 "Victims of traffickers"
label value exp1_dv exp1_dv
tab exp1_dv

**Analysis
bysort exp1_treatment: tab chm_exp1_describe2


by foreign: eststo: quietly estpost summarize price mpg rep78, listwise
**Balance (Demographics)********************************************************
set more off

gen party_scale2=dem1
replace party_scale2=3 if ind1==2
replace party_scale2=4 if ind1==3
replace party_scale2=5 if ind1==1
replace party_scale2 =6 if rep1==2
replace party_scale2 =7 if rep1==1

label var age Age
label var female Female
label var householdincome Household_Income
label var white White
label var religiosity Religiosity
label var party_scale2 Party_Identification
label var knowledge Political_Knowledge
orth_out age female householdincome white religiosity party_scale2 knowledge using balance.csv, replace by(exp1_treatment) pcompare proportion test


save "Figure4_clean.dta", replace

export delimited using "Figure_D2.csv", nolabel replace
